Packages used
library(ape)
library(ggtree)
## Warning: package 'ggtree' was built under R version 3.6.1
## Registered S3 method overwritten by 'treeio':
##   method     from
##   root.phylo ape
## ggtree v1.16.6  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
library(phangorn)
library(phytools)
## Loading required package: maps
## 
## Attaching package: 'phytools'
## The following object is masked from 'package:ggtree':
## 
##     read.newick
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(heatmaply)
## Loading required package: viridis
## Loading required package: viridisLite
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
## 
## ======================
## Welcome to heatmaply version 1.0.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## ======================
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.13.2
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:phytools':
## 
##     untangle
## The following object is masked from 'package:ggtree':
## 
##     rotate
## The following objects are masked from 'package:ape':
## 
##     ladderize, rotate
## The following object is masked from 'package:stats':
## 
##     cutree
library(vegan)
## Loading required package: permute
## 
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
## 
##     shuffle
## Loading required package: lattice
## Registered S3 methods overwritten by 'vegan':
##   method         from      
##   reorder.hclust seriation 
##   rev.hclust     dendextend
## This is vegan 2.5-6
## 
## Attaching package: 'vegan'
## The following objects are masked from 'package:phangorn':
## 
##     diversity, treedist
library(corrgram)
## 
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
## 
##     panel.fill
library(Hmisc)
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:plotly':
## 
##     subplot
## The following object is masked from 'package:ape':
## 
##     zoom
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(corrplot)
## corrplot 0.84 loaded
Input files for all analyses (individual chunks relist input files for that plot)
tree_rooted <- read.newick("input_files/ConcatAlnRooted.tre")
matrix_file <- read.table(file = "input_files/PresAbsSummary.txt", 
                          header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = "\t")
order_mat <- read.table(file = "input_files/reorder.txt", header = TRUE, row.names = 1, 
                        stringsAsFactors = FALSE, sep = "\t")
counts <- read.delim("input_files/cHGperGenome.txt")
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt", 
                         stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")

### uniprot_arcog.txt mined from www.uniprot.org and includes various annotations
arcog_plus <- read.delim("input_files/uniprot_arcog.txt", header=TRUE, row.names = NULL, com='', check.names=F)
Build heatmap with phylogenetic tree attached
tree_rooted <- read.newick("input_files/ConcatAlnRooted.tre")
matrix_file <- read.table(file = "input_files/PresAbsSummary.txt", 
                          header = TRUE, row.names = 1, stringsAsFactors = FALSE, sep = "\t")
order_mat <- read.table(file = "input_files/reorder.txt", header = TRUE, row.names = 1, 
                        stringsAsFactors = FALSE, sep = "\t")

intermediate_tree <- ggtree(tree_rooted, branch.length = T, ladderize = TRUE) + 
  geom_tiplab(size = 2.5, linetype = "dashed", linesize = 0.1, align = TRUE) + 
  geom_point2(aes(subset = as.numeric(label)>79 & !isTip), color = "firebrick", size = 1) + 
  geom_treescale(x = -0.01, y = -5, offset = 2.5) +
  theme(legend.position="none")
## Warning: Duplicated aesthetics after name standardisation: size
alpha <- gheatmap(intermediate_tree, matrix_file, offset = 0.7, width = 2.5, colnames = F) + 
  scale_fill_manual(values = c("lightgrey","black")) +
  theme(legend.position="none") +
  theme(text = element_text(size=20))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
bet <- get_heatmap_column_position(alpha, by = "bottom")
top <- get_heatmap_column_position(alpha, by = "top")

alpha + 
  geom_text(data = bet, aes(x, y, label = label), nudge_y = -0.5, size = 2.5, color = "red", angle = 90)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

ggsave("out_files/heattree.pdf", height = 20, width = 10, dpi = 300)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## optional interactive heatmap
x <- as.phylo(intermediate_tree)
x <- force.ultrametric(x, method = 'nnls')
row_dend  <- x %>% as.dendrogram 

heatmaply(order_mat, file = "out_files/heatmaply_plot.html", height = 1200, width = 400, fontsize_row = 4, 
          hide_colorbar = TRUE, dendrogram = 'row', Rowv = row_dend, margins = c(100,90))
## Warning: 'heatmap' objects don't have these attributes: 'showlegend'
## Valid attributes include:
## 'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
## Warning: 'heatmap' objects don't have these attributes: 'showlegend'
## Valid attributes include:
## 'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'heatmap' objects don't have these attributes: 'showlegend'
## Valid attributes include:
## 'type', 'visible', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Plot genomes per homologous group
### genome counts in bar plot
counts <- read.delim("input_files/cHGperGenome.txt")
ggplot(data = counts, aes(x = reorder(alpha_hg, -No..of.Genomes), y = No..of.Genomes)) +
  geom_bar(stat = "identity", color="black", fill="steelblue") +
  theme_bw() +
  xlab("cHG") +
  ylab("No. of genomes") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  scale_y_continuous(expand = c(0,0), limits = c(0,180), breaks = seq(0,180, by = 30)) +
  theme(axis.text.x = element_text(angle = 90))

ggsave("out_files/cHGperGenome.pdf", width = 6, height = 6, dpi = 500)
Plot rarefaction curve
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt", 
                         stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")

t_pan_matrix <- t(pan_matrix)
t_sp <- specaccum(t_pan_matrix,method="random",permutations=100)

#pdf('out_files/rarefaction_plot.pdf', width = 7, height = 7)   
par(las = 1)
plot(t_sp, ci.type = "poly", col = "blue",lwd = 2, ci.lty = 0, 
     ci.col = "lightblue", xlab = "Gene families", ylab= "No. of taxa")
boxplot(t_sp,col = "yellow", add = TRUE)

Plot spearman correlations
pan_matrix <- read.table(file = "input_files/PresAbsMatrixAnnot.txt", 
                         stringsAsFactors = TRUE, header = TRUE, row.names = 1, sep = "\t")

in_matrix <- as.matrix(pan_matrix)
spearman_rcorr <- rcorr(in_matrix, type = "spearman")
#write.table(spearman_rcorr$P, file = "out_files/spearman_p.txt", sep = "\t", row.names = TRUE, col.names = TRUE)
#write.table(spearman_rcorr$r, file = "out_files/spearman_r.txt", sep = "\t", row.names = TRUE, col.names = TRUE)

#### apply bonferroni correction for 48x48 pairwise comparisons
bonf_corr_p <- spearman_rcorr$P
spear_r <- spearman_rcorr$r
bonf_corr_p[is.na(bonf_corr_p)] <- 0
y <- function(x) ifelse(x*1128<1, x*1128, 1)
bonf_corr_p[] <- vapply(bonf_corr_p, y, numeric(1))
bonfer_matrix <- ifelse(bonf_corr_p<1, spear_r, 0)

#### output plot
pdf("out_files/corrplot.pdf", height = 6, width = 6)
corrplot(bonfer_matrix, type = "lower", order = "FPC", tl.cex = 0.5, tl.srt = 30, tl.col = "black")